{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Testing different schemes for encoding ids"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "const convertHrtime = require('convert-hrtime');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "const d3array = require('d3-array')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Timing code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "export function timeit(n: number, f: any, args: any[]) {\n",
    "    let sum = 0.0;\n",
    "    for (i=0; i<n; i++) {\n",
    "        start = process.hrtime();\n",
    "        f.apply(null, args);\n",
    "        end = process.hrtime(start);\n",
    "        sum += convertHrtime(end).milliseconds;\n",
    "    }\n",
    "    return sum/n;\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ID generation functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "namespace Private {\n",
    "\n",
    "  export\n",
    "  function createTriplet(path: number, version: number, store: number): string {\n",
    "    // Split the path into 16-bit values.\n",
    "    let pc = path & 0xFFFF;\n",
    "    let pb = (((path - pc) / 0x10000) | 0) & 0xFFFF;\n",
    "    let pa = (((path - pb - pc) / 0x100000000) | 0) & 0xFFFF;\n",
    "\n",
    "    // Split the version into 16-bit values.\n",
    "    let vc = version & 0xFFFF;\n",
    "    let vb = (((version - vc) / 0x10000) | 0) & 0xFFFF;\n",
    "    let va = (((version - vb - vc) / 0x100000000) | 0) & 0xFFFF;\n",
    "\n",
    "    // Split the store id into 16-bit values.\n",
    "    let sb = store & 0xFFFF;\n",
    "    let sa = (((store - sb) / 0x10000) | 0) & 0xFFFF;\n",
    "\n",
    "    // Convert the parts into a string identifier triplet.\n",
    "    return String.fromCharCode(pa, pb, pc, va, vb, vc, sa, sb);\n",
    "  }\n",
    "\n",
    "  export\n",
    "  function idTripletCount(id: string): number {\n",
    "    return id.length >> 3;\n",
    "  }\n",
    "\n",
    "  export\n",
    "  function idPathAt(id: string, i: number): number {\n",
    "    let j = i << 3;\n",
    "    let a = id.charCodeAt(j + 0);\n",
    "    let b = id.charCodeAt(j + 1);\n",
    "    let c = id.charCodeAt(j + 2);\n",
    "    return a * 0x100000000 + b * 0x10000 + c;\n",
    "  }\n",
    "\n",
    "  export\n",
    "  function idVersionAt(id: string, i: number): number {\n",
    "    let j = i << 3;\n",
    "    let a = id.charCodeAt(j + 3);\n",
    "    let b = id.charCodeAt(j + 4);\n",
    "    let c = id.charCodeAt(j + 5);\n",
    "    return a * 0x100000000 + b * 0x10000 + c;\n",
    "  }\n",
    "\n",
    "  export\n",
    "  function idStoreAt(id: string, i: number): number {\n",
    "    let j = i << 3;\n",
    "    let a = id.charCodeAt(j + 6);\n",
    "    let b = id.charCodeAt(j + 7);\n",
    "    return a * 0x10000 + b;\n",
    "  }\n",
    "\n",
    "  export\n",
    "  function randomPath(min: number, max: number): number {\n",
    "    return min + Math.round(Math.random() * Math.sqrt(max - min));\n",
    "  }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "export\n",
    "function createDuplexId(version: number, store: number): string {\n",
    "  // Split the version into 16-bit values.\n",
    "  let vc = version & 0xFFFF;\n",
    "  let vb = (((version - vc) / 0x10000) | 0) & 0xFFFF;\n",
    "  let va = (((version - vb - vc) / 0x100000000) | 0) & 0xFFFF;\n",
    "\n",
    "  // Split the store id into 16-bit values.\n",
    "  let sb = store & 0xFFFF;\n",
    "  let sa = (((store - sb) / 0x10000) | 0) & 0xFFFF;\n",
    "\n",
    "  // Convert the parts into a string identifier duplex.\n",
    "  return String.fromCharCode(va, vb, vc, sa, sb);\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "export\n",
    "function createTriplexId(version: number, store: number, lower: string, upper: string): string {\n",
    "  // The maximum path in a triplex id.\n",
    "  const MAX_PATH = 0xFFFFFFFFFFFF;\n",
    "\n",
    "  // Set up the variable to hold the id.\n",
    "  let id = '';\n",
    "\n",
    "  // Fetch the triplet counts of the ids.\n",
    "  let lowerCount = lower ? Private.idTripletCount(lower) : 0;\n",
    "  let upperCount = upper ? Private.idTripletCount(upper) : 0;\n",
    "\n",
    "  // Iterate over the id triplets.\n",
    "  for (let i = 0, n = Math.max(lowerCount, upperCount); i < n; ++i) {\n",
    "    // Fetch the lower identifier triplet, padding as needed.\n",
    "    let lp: number;\n",
    "    let lc: number;\n",
    "    let ls: number;\n",
    "    if (i >= lowerCount) {\n",
    "      lp = 0;\n",
    "      lc = 0;\n",
    "      ls = 0;\n",
    "    } else {\n",
    "      lp = Private.idPathAt(lower, i);\n",
    "      lc = Private.idVersionAt(lower, i);\n",
    "      ls = Private.idStoreAt(lower, i);\n",
    "    }\n",
    "\n",
    "    // Fetch the upper identifier triplet, padding as needed.\n",
    "    let up: number;\n",
    "    let uc: number;\n",
    "    let us: number;\n",
    "    if (i >= upperCount) {\n",
    "      up = upperCount === 0 ? MAX_PATH + 1 : 0;\n",
    "      uc = 0;\n",
    "      us = 0;\n",
    "    } else {\n",
    "      up = Private.idPathAt(upper, i);\n",
    "      uc = Private.idVersionAt(upper, i);\n",
    "      us = Private.idStoreAt(upper, i);\n",
    "    }\n",
    "\n",
    "    // If the triplets are the same, copy the triplet and continue.\n",
    "    if (lp === up && lc === uc && ls === us) {\n",
    "      id += Private.createTriplet(lp, lc, ls);\n",
    "      continue;\n",
    "    }\n",
    "\n",
    "    // If the triplets are different, the well-ordered identifiers\n",
    "    // assumption means that the lower triplet compares less than\n",
    "    // the upper triplet. The task now is to find the nearest free\n",
    "    // path slot among the remaining triplets.\n",
    "\n",
    "    // If there is free space between the path portions of the\n",
    "    // triplets, select a new path which falls between them.\n",
    "    if (up - lp > 1) {\n",
    "      let np = Private.randomPath(lp + 1, up - 1);\n",
    "      id += Private.createTriplet(np, version, store);\n",
    "      return id.slice();\n",
    "    }\n",
    "\n",
    "    // Otherwise, copy the left triplet and reset the upper count\n",
    "    // to zero so that the loop chooses the nearest available path\n",
    "    // slot after the current lower triplet.\n",
    "    id += Private.createTriplet(lp, lc, ls);\n",
    "    upperCount = 0;\n",
    "  }\n",
    "\n",
    "  // If this point is reached, the lower and upper identifiers share\n",
    "  // the same path but diverge based on the version or store id. It is\n",
    "  // safe to insert anywhere in an extra triplet.\n",
    "  let np = Private.randomPath(1, MAX_PATH);\n",
    "  id += Private.createTriplet(np, version, store);\n",
    "  return id.slice();\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "export\n",
    "function createTriplexIds(n: number, version: number, store: number, lower: string, upper: string): string[] {\n",
    "  let ids: string[] = [];\n",
    "\n",
    "    while (ids.length < n) {\n",
    "    let id = createTriplexId(version, store, lower, upper);\n",
    "    ids.push(id);\n",
    "    lower = id;\n",
    "  }\n",
    "\n",
    "  return ids;\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Base64 encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "export function encodeBase64(input: string): string {\n",
    "    const buffer = Buffer.from(input);\n",
    "    return buffer.toString('base64');\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "export function decodeBase64(input: string): string {\n",
    "    return Buffer.from(input, 'base64').toString()\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Regular expression (Ian's PR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "const HS_L = '\\uD800';\n",
    "const HS_U = '\\uDBFF';\n",
    "const LS_L = '\\uDC00';\n",
    "const LS_U = '\\uDFFF';\n",
    "const LS_REGEX = new RegExp(`([${LS_L}-${LS_U}])`, 'g');\n",
    "const UNPAIRED_HS_REGEX = new RegExp(\n",
    "    `([${HS_L}-${HS_U}])(?![${LS_L}-${LS_U}])`,\n",
    "    'g',\n",
    ");\n",
    "const PAIRED_LS_REGEX = new RegExp(`X${HS_L}([${LS_L}-${LS_U}])`, 'g');\n",
    "const PAIRED_HS_REGEX = new RegExp(`([${HS_L}-${HS_U}])${LS_L}X`, 'g');\n",
    "\n",
    "export\n",
    "function stripSurrogates(id: string): string {\n",
    "    return id.replace(PAIRED_LS_REGEX, '$1').replace(PAIRED_HS_REGEX, '$1');\n",
    "}\n",
    "\n",
    "export\n",
    "function generateIdString(str: string): string {\n",
    "    str = str.replace(LS_REGEX, `X${HS_L}$1`);\n",
    "    str = str.replace(UNPAIRED_HS_REGEX, `$1${LS_L}X`);\n",
    "    return str;\n",
    "}\n",
    "\n",
    "export\n",
    "function stringToCharCodes(s: string): number[] {\n",
    "    result = new Array<number>();\n",
    "    for (i=0; i<s.length; i++) {\n",
    "        result.push(s.charCodeAt(i))\n",
    "    }\n",
    "    return result\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ids = createTriplexIds(2**10,1,1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "timesA = ids.map(item => {\n",
    "    return timeit(100, encodeBase64, [item])\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "meanA = d3array.mean(timesA)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "timesB = ids.map(item => {\n",
    "    return timeit(100, generateIdString, [item])\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "meanB = d3array.mean(timesB)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "meanB/meanA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "patchIds = createTriplexIds(2**13, 1, 1, ids[0], ids[1])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "jp-Babel (Node.js)",
   "language": "babel",
   "name": "babel"
  },
  "language_info": {
   "file_extension": ".js",
   "mimetype": "application/javascript",
   "name": "javascript",
   "version": "12.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
